import requests
from bs4 import BeautifulSoup
from time import sleep
#请求网页
def getHtmlPage():
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}

    for i in range(10,30):
        url = 'http://data.sports.sohu.com/nba/nba_players_rank.php?order_by=points&spm=smpc.fb-nba-home.top-dc.2.1620824904040nf6byr7/{}.html'.format(i)
        resp = requests.get(url=url,headers=headers)
        resp.encoding = 'utf-8'
        parseHtml(resp.text)
    #return resp.text

def parseHtml(pageText):
    sleep(1)
    contentList = []
    soup = BeautifulSoup(pageText,'lxml')
    title = soup.title.text
    contentList.append(title)
    #找到class=grap下的所有DIV
    contentDiv = soup.select('.grap > div')
    #循环找到每个DIV，并获取DIV中的文本
    for div in contentDiv:
        content = div.text
        contentList.append(content)
    #定义写入的文件
    fo = open('球员信息\\'+title+'.txt','w',encoding='utf-8')
    fo.writelines(contentList)
    fo.close()
    print(title+'页面爬取完毕')
    

if __name__ == '__main__':
    pageText = getHtmlPage()
    parseHtml(pageText)
